This module extracts and describes the Census data to be used for Model Evaluation.
The raw data files were downloaded in geojson format from the American Community Survey 2019 - 5 year survey. The ultimate data source is the US Census Bureau. The data distributor is censusreporter.org
library(tidyverse)
library(kableExtra)
library(sf)
library(stars)
library(ggplot2)
library(ggspatial)
library(viridis)
library(leaflet)
library(classInt)
library(RColorBrewer)Let’s load the Raleigh Corporate Limits. By superimposing the city limits with census data, we can verify that the city is fully covered by the latter.
raleigh_corporate_limits_file = paste0(data_dir, "Corporate_Limits.geojson")
# Load the geojson files
# change the coordinate system to 2264
# filter objects that belong to RALEIGHT
# --------------------------------------------
gj_raleigh <- sf::st_read(raleigh_corporate_limits_file, quiet = TRUE) %>%
st_transform(2264) %>%
filter( SHORT_NAME == 'RAL' )Load the poverty geojson census data.
pov_root = "acs2019_5yr_B17017_15000US371830529022"
poverty_B17017_raw = st_read( paste0(data_dir, pov_root, "/", pov_root, ".geojson" )
, quiet = TRUE ) %>% st_transform(2264)Extract the regional average poverty rate.
regional_avg_poverty_rate = poverty_B17017_raw %>%
filter( geoid == "31000US39580") %>%
mutate( rate = B17017002 / B17017001) %>%
pull(rate)Impute any missing poverty rate regions with the median rate for the entire Raleigh-Cary Metro Area.
poverty_B17017_raw %>%
select( geoid, name, B17017001, B17017002 ) %>%
filter( name != 'Raleigh-Cary, NC Metro Area') %>%
mutate( poverty_rate = ifelse( B17017001 == 0, regional_avg_poverty_rate , B17017002 / B17017001) ) %>%
mutate( area = st_area(.)) -> gj_povertypal_fun = colorQuantile("Spectral", NULL , n = 6)
gj_poverty_leaf = st_transform(gj_poverty, 4326)
p_popup <- paste0("Poverty: ", format(round(gj_poverty$poverty_rate, 3), nsmall = 3 ), "<br>" ,
"Name: ", gj_poverty$name , "<br>" ,
"Geoid: ", gj_poverty$geoid, "<br>" )
# get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classIntervals(c(min(gj_poverty$poverty_rate) - .00001,
gj_poverty$poverty_rate), n = 6, style = "quantile")
breaks_qt## style: quantile
## [-1e-05,0.02073365) [0.02073365,0.04442344) [0.04442344,0.07557252)
## 104 104 104
## [0.07557252,0.1113514) [0.1113514,0.1808219) [0.1808219,0.6422414]
## 104 104 105
leaflet( gj_poverty_leaf ) %>% addPolygons( stroke = FALSE, # Remove polygon borders
fillColor = ~pal_fun(poverty_rate), # set fill color with function from above and value
fillOpacity = 0.5 ,
smoothFactor = 0.5, # make it nicer
popup = p_popup
) %>% addTiles() %>%
addLegend( "bottomright", # Location
colors = brewer.pal(6, "Spectral") ,
labels = paste0("up to ", format(breaks_qt$brks[-1], digits = 2)) ,
title = "Raleigh Poverty rate by Census Block Group"
)Table B23025 contains Employment Status for the Population 16 years and over. The relevant field is Unemployed % from the civilian labor force.
unemp_root = "acs2019_5yr_B23025_15000US371830529022"
unemp_B23025_raw = st_read( paste0(data_dir, unemp_root, "/", unemp_root, ".geojson" )
, quiet = TRUE ) %>% st_transform(2264)Extract the regional average unemployment rate for imputation of regions with undefined unemployment rates.
regional_avg_unemp_rate = unemp_B23025_raw %>%
filter( geoid == "31000US39580") %>%
mutate( rate = B23025005 / B23025001) %>%
pull( rate )Impute any missing poverty rate regions with the median rate for the entire Raleigh-Cary Metro Area.
unemp_B23025_raw %>%
select( geoid, name, B23025005, B23025001 ) %>%
filter( name != 'Raleigh-Cary, NC Metro Area') %>%
mutate( unemployment_rate = ifelse( B23025001 == 0, regional_avg_unemp_rate , B23025005 / B23025001) ) %>%
mutate( area = st_area(.)) -> gj_unempgj_unemp_leaf = st_transform(gj_unemp, 4326)
p_popup <- paste0("Unemp: ", format(round(100 * gj_unemp$unemployment_rate, 1), nsmall = 1 ), "<br>" ,
"Name: ", gj_unemp$name , "<br>" ,
"Geoid: ", gj_unemp$geoid, "<br>" )
# get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classIntervals(c(min(gj_unemp$unemployment_rate) - .00001,
gj_unemp$unemployment_rate), n = 6, style = "quantile")
breaks_qt## style: quantile
## [-1e-05,0.002762431) [0.002762431,0.01464605) [0.01464605,0.02296588)
## 104 103 105
## [0.02296588,0.03476483) [0.03476483,0.05166586) [0.05166586,0.1609695]
## 104 104 105
leaflet( gj_unemp_leaf ) %>% addPolygons( stroke = FALSE, # Remove polygon borders
fillColor = ~pal_fun(unemployment_rate), # set fill color with function from above and value
fillOpacity = 0.5 ,
smoothFactor = 0.5, # make it nicer
popup = p_popup
) %>% addTiles() %>%
addLegend( "bottomright", # Location
colors = brewer.pal(6, "Spectral") ,
labels = paste0("up to ", format(breaks_qt$brks[-1], digits = 2)) ,
title = "Raleigh Unemployment By Block Group"
)ggplot() + geom_sf(data=gj_unemp , aes( fill = unemployment_rate ) ) +
scale_fill_viridis_c( alpha = .4) +
annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) Table B19013 contains Median Household Income . The relevant field is Unemployed % from the civilian labor force.
income_root = "acs2019_5yr_B19013_15000US371830523011"
income_B19013_raw = st_read( paste0(data_dir, income_root, "/", income_root, ".geojson" )
, quiet = TRUE ) %>% st_transform(2264)Extract the regional average unemployment rate for imputation of regions with undefined unemployment rates.
regional_avg_income = 67266 # Median household income for Raleigh city in 2019 inflated adjusted dollarsImpute any missing median household income with the regional median household income for the entire Raleigh-Cary Metro Area.
income_B19013_raw %>% select( geoid, name, B19013001 ) %>%
filter( str_length(geoid) == 19 ) %>% # Filter out the higher level aggregate regions shown by shorter geoid like Cary, NC, Raleigh, NC, etc.
mutate( income = ifelse( is.na(B19013001) | B19013001 == 0, regional_avg_income , B19013001) ) %>%
mutate( area = st_area(.)) -> gj_incomegj_income_leaf = st_transform(gj_income, 4326)
p_popup <- paste0("Income: $", format(round( gj_income$income, 0), nsmall = 0 ), "<br>" ,
"Name: ", gj_income$name , "<br>" ,
"Geoid: ", gj_income$geoid, "<br>" )
# get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classIntervals(c(min(gj_income$income) - .00001,
gj_income$income), n = 6, style = "quantile")
breaks_qt## style: quantile
## [22000,41870.83) [41870.83,56457.33) [56457.33,67266) [67266,81388.33)
## 41 41 36 46
## [81388.33,100843.3) [100843.3,232955]
## 41 41
leaflet( gj_income_leaf ) %>% addPolygons( stroke = FALSE, # Remove polygon borders
fillColor = ~pal_fun(income), # set fill color with function from above and value
fillOpacity = 0.5 ,
smoothFactor = 0.5, # make it nicer
popup = p_popup
) %>% addTiles() %>%
addLegend( "bottomright", # Location
colors = brewer.pal(6, "Spectral") ,
labels = paste0("up to ", format(breaks_qt$brks[-1], digits = 2)) ,
title = "Raleigh Median Household Income"
)